/*
This code compares the loan amounts reported on PPP compared to the information possibly reporting loan amounts on schedule K of passthrough returns
*/

do "${dodir}/make_globals.do"

//Use schedule K data

import delimited "${datadir}/schk_comp/schk_loans", clear

rename v1 yr
//rename v2 ein
rename v3 id
rename v11 untax_inc

drop v*

duplicates drop

total untax_inc

//Keep only 2020 and 2021 data for schedule K

drop if yr == 2022

//Make unique by id and yr
collapse (sum) untax_inc, by(id yr)

//Reshape to make unique by id
reshape wide untax_inc, i(id) j(yr)

//Replace missings with zero
replace untax_inc2020 = 0 if untax_inc2020 == .
replace untax_inc2021 = 0 if untax_inc2021 == .


//Add years together
gen total = untax_inc2020 + untax_inc2021

save "${datadir}/schk_comp/schk_ppp_rep", replace

//Merge PPP data and schedule K data together
use "${datadir}/ppp_wide", clear

merge 1:1 id using "${datadir}/schk_comp/schk_ppp_rep", keep(matched using)

replace forgivenessamount_first = 0 if forgivenessamount_first == .
replace forgivenessamount_second = 0 if forgivenessamount_second == .

gen sba_total = forgivenessamount_first + forgivenessamount_second

keep tin dateapproved_first forgivenessamount_first dateapproved_second forgivenessamount_second /// 
	untax_in* total sba_total
	
global sba_var sba_total
global tax_var	total

//Get raw difference
cap drop diff	
gen diff = ${sba_var} - ${tax_var}

//Make histogram of all and of matched only
histogram diff
histogram diff if ${sba_var} > 0

//Take a percent difference
cap drop pct_diff
gen pct_diff = diff / ${tax_var}

//Cap difference at 200%
replace pct_diff = 2 if pct_diff > 2

replace pct_diff = pct_diff*100

histogram pct_diff, percent

//Make various plots

//Unweighted
histogram pct_diff if ${sba_var} > 0 , percent /// 
graphregion(color(white)) bgcolor(white) color(navy) ///
	width(20) start(-110) ///
	ylabel(,format(%9.0fc) angle(0) nogrid) ytitle(% of Matched Firms) /// 
	xtitle(% Difference Between SBA Loan Amount and Schedule K Amount)
graph export $outdir/schk_comp_unweighted.png, replace width(3000)

//Sch K dollar weighted
histogram pct_diff [fw=total] if ${sba_var} > 0 , percent /// 
graphregion(color(white)) bgcolor(white) color(navy) ///
	width(20) start(-110) ///
	ylabel(,format(%9.0fc) angle(0) nogrid) ytitle(% of Schedule K dollars) /// 
	xtitle(% Difference Between SBA Loan Amount and Schedule K Amount)
graph export $outdir/schk_comp_schk_weighted.png, replace width(3000)

//SBA dollar weighted
replace $sba_var = round($sba_var, 1)
histogram pct_diff [fw=sba_total] if ${sba_var} > 0 , percent /// 
graphregion(color(white)) bgcolor(white) color(navy) ///
	width(20) start(-110) ///
	ylabel(,format(%9.0fc) angle(0) nogrid) ytitle(% of PPP dollars) /// 
	xtitle(% Difference Between SBA Loan Amount and Schedule K Amount)
graph export $outdir/schk_comp_sba_weighted.png, replace width(3000)

//Percent matched to a loan

cap drop matched
gen matched = ${sba_var} > 0
tab matched

//Percent within 10%
count if matched
count if abs(pct_diff) < 10


